Brian M. Schilder, Bioinformatician II
# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 2$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] 500
##
## $resolution
## [1] 0.6
##
## $resultsPath
## [1] "./Results"
##
## $nCores
## [1] 2
##
## $perplexity
## [1] 30
# load(file.path(resultsPath, "scRNAseq_results.RData"))
resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4"
load(file.path(resultsPath,"scRNAseq_results.RData"))library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr)
library(data.table)
library(readxl)
library(reshape2)
library(ggrepel)
library(plotly)
library(GeneOverlap) # BiocManager::install("GeneOverlap")
library(monocle) # BiocManager::install("monocle")
# BiocManager::install(c('DelayedArray', 'DelayedMatrixStats', 'org.Hs.eg.db', 'org.Mm.eg.db'))
library(org.Hs.eg.db)
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
library(enrichR) #BiocManager::install("enrichR")# Convert Seurat objt to CDS object
mDAT <- monocle::importCDS(DAT, import_all = T)
# generate size factors for normalization later
mDAT <- DESeq2::estimateSizeFactors(mDAT)
# Get pre-trained PBMC classifer
load(file.path(root, "Data/Garnett/hsPBMC")) # Download from: https://cole-trapnell-lab.github.io/garnett/classifiers/hsPBMC
mDAT <- garnett::classify_cells(mDAT, hsPBMC,
db = org.Hs.eg.db,
cluster_extend = TRUE,
cds_gene_id_type = "SYMBOL")## Warning in doTryCatch(return(expr), name, parentenv, handler): The
## following genes used in the classifier are not present in the input CDS.
## Interpret with caution. ENSG00000174059The following genes used in the
## classifier are not present in the input CDS. Interpret with caution.
## ENSG00000007062The following genes used in the classifier are not present
## in the input CDS. Interpret with caution. ENSG00000157404The following
## genes used in the classifier are not present in the input CDS. Interpret
## with caution. ENSG00000185291
table(pData(mDAT)$cell_type)##
## B cells CD34+ CD4 T cells CD8 T cells
## 13 92 3 1
## Dendritic cells Monocytes NK cells T cells
## 280 15025 801 16
## Unknown
## 2913
table(pData(mDAT)$cluster_ext_type) ##
## B cells CD34+ CD4 T cells Dendritic cells
## 12 17 3 165
## Monocytes NK cells T cells Unknown
## 17165 701 15 1066
# Get feature genes for each cell type
feature_genes <- garnett::get_feature_genes(classifier = hsPBMC,
node = "root",
db = org.Hs.eg.db,
convert_ids = F)
head(feature_genes) ## B cells CD34+ Dendritic cells Monocytes
## (Intercept) -0.884989922 0.47694427 -0.124629734 -0.0301907756
## ENSG00000196154 -0.005499815 -0.01088859 -0.007978714 0.0056365451
## ENSG00000019582 0.030146634 -0.01959215 0.015232191 0.0001883507
## ENSG00000156738 2.193353690 -0.66191294 -1.104899464 -0.7393680112
## ENSG00000177455 2.056641938 -0.71851810 -1.941859710 -1.0448682467
## ENSG00000105369 1.800296145 -0.53835614 0.063380804 -0.1973728377
## NK cells T cells Unknown
## (Intercept) -0.911657164 0.524757370 0.949765956
## ENSG00000196154 -0.001627233 -0.005835858 0.026193667
## ENSG00000019582 -0.005880096 -0.023473024 0.003378099
## ENSG00000156738 -0.096566144 -0.602417257 1.011810124
## ENSG00000177455 0.297279508 0.177790907 1.173533704
## ENSG00000105369 -0.618567693 -0.963141845 0.453761563
# Convert back to Seurat object & save results
DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type")])
write.csv(DAT@meta.data, file.path(resultsPath, "garnett_results.csv"), quote = F,row.names = F)markerList <- c("CD14", "FCGR3A")
markerData <- DAT@scale.data[row.names(DAT@scale.data) %in% markerList,] %>% t() %>% data.frame()
## Efficiently merge using data.table
dt1 <- data.table(markerData, keep.rownames = "Cell", key = "Cell") %>% copy()
dt2 <- data.table(DAT@meta.data[c("cell_type","post_clustering")], keep.rownames = "Cell", key = "Cell") %>% copy()
markerDT <- dt1[dt2]
ggplot(data = markerDT, aes(x=CD14, y=FCGR3A) ) +
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="purple", bins = 100, size=.1) +
geom_point(aes(color=as.factor(cell_type)), shape=16, stroke=0, size=2, alpha=.1) +
guides(colour = guide_legend(title="cell_type",override.aes = list(alpha = 1))) tSNE_metadata_plot <- function(var, labSize=12){
# Metadata plot
p1 <- Seurat::TSNEPlot(DAT, do.return = T, do.label = T, group.by = var,label.size = labSize,
plot.title=paste(var), vector.friendly=T) +
theme(legend.position = "top") +
scale_color_brewer( palette = "Dark2", aesthetics = aes(alpha=.5))
# t-SNE clusters plot
p2 <- Seurat::TSNEPlot(DAT, do.return = T, do.label = T,label.size = labSize,
plot.title=paste("Unsupervised Clusters"), vector.friendly=T) +
theme(legend.position = "top") +
scale_color_brewer( palette = "Set1", aesthetics = aes(alpha=.5))
print(cowplot::plot_grid(p1,p2))
} tSNE_metadata_plot("cell_type")tSNE_metadata_plot("cluster_ext_type")tSNE_metadata_plot("mut")FeaturePlot(DAT,features.plot = c("CD14","FCGR3A"),
cols.use = c("grey","red","blue","purple"),
dark.theme = T, nCol = 2, overlay = T, no.legend = F) FeaturePlot(DAT,features.plot = c("LRRK2", "GBA"),
cols.use = c("grey","purple","cyan","blue"),
dark.theme = T, nCol = 2, overlay = T, no.legend = F) FeaturePlot(DAT, features.plot = c("MS4A4A","MS4A6A"),
cols.use = c("grey","red","green","blue"),
dark.theme = T, nCol = 2, overlay = T, no.legend = F)clust0.markers <- FindMarkers(DAT, ident.1=0, min.pct = 0.25, only.pos = F, test.use = "wilcox")
clust1.markers <- FindMarkers(DAT, ident.1=1, min.pct = 0.25, only.pos = F, test.use = "wilcox")
clust0.uniqueMarkers <- clust0.markers[!(row.names(clust0.markers) %in% row.names(clust1.markers)),] %>%
subset(p_val_adj<=0.05)
clust1.uniqueMarkers <- clust1.markers[!(row.names(clust1.markers) %in% row.names(clust0.markers)),] %>%
subset(p_val_adj<=0.05)
difference <- abs( length(row.names(clust0.uniqueMarkers)) - length(row.names(clust1.uniqueMarkers) ) )
uniqueMarkers <- data.frame(Cluster0_markers=c(row.names(clust0.uniqueMarkers), rep("",difference) ),
Cluster1_markers=row.names(clust1.uniqueMarkers))
write.csv(uniqueMarkers,
file.path(resultsPath,"unique_cluster_markers.csv"),
quote = F, row.names = F)# clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control")# clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "mut", group1 = "PD", group2 = "GBA")## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
DGE_within_clusters(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control", clusterList = c(0,1))DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA", clusterList = c(0,1))## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
"GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018",
"Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
# createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")
geneList <- data.frame(Gene=row.names(MonocyteSubtype.markers),
Weight=scales::rescale(length(MonocyteSubtype.markers$p_val_adj):1))
results <- enrichr(genes = geneList, databases = enrichr_dbs ) Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.
for (db in enrichr_dbs){
cat('\n')
cat("##",db,"\n")
# res <- subset(results[[db]], Adjusted.P.value<=0.05)
createDT_html(results[[db]], paste("Enrichr Results:",db))
cat('\n')
}